Name: Makesh Srinivasan
Registration number: 19BCE1717
Course code: CSE4020
Faculty: Dr. Abdul Quadir
Slot: L31 + L32
Date: 18-October-2021 Monday
Instructions:
perform linear simple vector machine, polynomial vector machine, SVM with kernal and observe the plots using sigmoid functions
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.datasets import make_circles
import plotly.express as px
%matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
Create a dataset with two features and a label. Then, plot the same as a scatter plot to observe the label-classifications
X, y = make_circles(n_samples=300, noise=0.1)
data = pd.DataFrame(dict(X0=X[:,0], X1=X[:,1], Y=y))
colors = {0:'red', 1:'blue'}
fig, ax = plt.subplots()
grpd = data.groupby('Y')
for key, group in grpd:
group.plot(ax=ax, kind='scatter', x='X0', y='X1', label=key, color=colors[key])
plt.show()
There are two classes, class = 1 shown in blue and class = 0 shown in red
Splitting into train and test set
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3)
Creating a Linear SVM model
linear=SVC(kernel='linear')
linear.fit(X_train,y_train)
SVC(kernel='linear')
Performance of SVM-linear
pred=linear.predict(X_test)
print("Accuracy = ", accuracy_score(y_test,pred))
Accuracy = 0.4777777777777778
data['X2']=(data['X0']**2)/2*4.43
data['X3']=data['X1']**2/1.3
data.head(4)
| X0 | X1 | Y | X2 | X3 | |
|---|---|---|---|---|---|
| 0 | 1.065865 | -0.400342 | 0 | 2.516393 | 0.123287 |
| 1 | -0.602956 | -0.964063 | 0 | 0.805277 | 0.714936 |
| 2 | -0.723526 | -0.539641 | 1 | 1.159531 | 0.224010 |
| 3 | 0.962780 | 0.161376 | 0 | 2.053185 | 0.020032 |
X=data.drop(['Y'],axis=1)
y=data.Y
Splitting into train and test set
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3)
Plotting
for i in range(0, 4):
for j in range(i+1,4):
for k in range(j+1,4):
var = "X"
vari = var + str(i)
varj = var + str(j)
vark = var + str(k)
fig = px.scatter_3d(data,x=vari,y=varj,z=vark,color='Y')
fig.show()
Creating the SVM-polynomial model
poly=SVC(kernel='poly')
poly.fit(X_train,y_train)
SVC(kernel='poly')
Performance measure of SVM polynomial
pred=poly.predict(X_test)
print("Accuracy = ", accuracy_score(y_test,pred))
Accuracy = 0.8222222222222222
SVC model
rbf=SVC(kernel='rbf')
rbf.fit(X_train,y_train)
SVC()
Performance measure
pred=rbf.predict(X_test)
print("Accuracy = ", accuracy_score(y_test,pred))
Accuracy = 0.8111111111111111
Creating the model
sig=SVC(kernel='sigmoid')
sig.fit(X_train,y_train)
SVC(kernel='sigmoid')
Performance measure
pred=sig.predict(X_test)
print("Accuracy = ", accuracy_score(y_test,pred))
Accuracy = 0.4
def load(URL_):
data = pd.read_csv(URL_, header = None)
data.columns = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'outcome']
print("Data loaded...")
return data
data = load('https://archive.ics.uci.edu/ml/machine-learning-databases/car/car.data')
data.head()
Data loaded...
| buying | maint | doors | persons | lug_boot | safety | outcome | |
|---|---|---|---|---|---|---|---|
| 0 | vhigh | vhigh | 2 | 2 | small | low | unacc |
| 1 | vhigh | vhigh | 2 | 2 | small | med | unacc |
| 2 | vhigh | vhigh | 2 | 2 | small | high | unacc |
| 3 | vhigh | vhigh | 2 | 2 | med | low | unacc |
| 4 | vhigh | vhigh | 2 | 2 | med | med | unacc |
enc = LabelEncoder()
data.loc[:, ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'outcome']] = data.loc[:, ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'outcome']].apply(enc.fit_transform)
data.head()
| buying | maint | doors | persons | lug_boot | safety | outcome | |
|---|---|---|---|---|---|---|---|
| 0 | 3 | 3 | 0 | 0 | 2 | 1 | 2 |
| 1 | 3 | 3 | 0 | 0 | 2 | 2 | 2 |
| 2 | 3 | 3 | 0 | 0 | 2 | 0 | 2 |
| 3 | 3 | 3 | 0 | 0 | 1 | 1 | 2 |
| 4 | 3 | 3 | 0 | 0 | 1 | 2 | 2 |
The dataset has four outcomes. To make the classifications simpler, filter 2 outcomes - unacc and acc
data = data.loc[((data.loc[:,"outcome"] == 0) | (data.loc[:,"outcome"] == 2)), :]
data
| buying | maint | doors | persons | lug_boot | safety | outcome | |
|---|---|---|---|---|---|---|---|
| 0 | 3 | 3 | 0 | 0 | 2 | 1 | 2 |
| 1 | 3 | 3 | 0 | 0 | 2 | 2 | 2 |
| 2 | 3 | 3 | 0 | 0 | 2 | 0 | 2 |
| 3 | 3 | 3 | 0 | 0 | 1 | 1 | 2 |
| 4 | 3 | 3 | 0 | 0 | 1 | 2 | 2 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 1716 | 1 | 1 | 3 | 1 | 0 | 1 | 2 |
| 1719 | 1 | 1 | 3 | 2 | 2 | 1 | 2 |
| 1720 | 1 | 1 | 3 | 2 | 2 | 2 | 0 |
| 1722 | 1 | 1 | 3 | 2 | 1 | 1 | 2 |
| 1725 | 1 | 1 | 3 | 2 | 0 | 1 | 2 |
1594 rows × 7 columns
cols = ["buying", "maint", "doors", "persons", "lug_boot", "safety"]
for i in range(4):
fig = px.scatter_3d(data,x=cols[i],y=cols[i+1],z=cols[i+2],color='outcome')
fig.show()
X = data.drop(['outcome'],axis=1)
y = data['outcome']
y=y.astype('int')
X.head()
| buying | maint | doors | persons | lug_boot | safety | |
|---|---|---|---|---|---|---|
| 0 | 3 | 3 | 0 | 0 | 2 | 1 |
| 1 | 3 | 3 | 0 | 0 | 2 | 2 |
| 2 | 3 | 3 | 0 | 0 | 2 | 0 |
| 3 | 3 | 3 | 0 | 0 | 1 | 1 |
| 4 | 3 | 3 | 0 | 0 | 1 | 2 |
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3)
poly=SVC(kernel='poly')
poly.fit(X_train,y_train)
SVC(kernel='poly')
Performance measure
pred=poly.predict(X_test)
print("Accuracy = ", accuracy_score(y_test,pred))
Accuracy = 0.8851774530271399
rbf=SVC(kernel='rbf')
rbf.fit(X_train,y_train)
SVC()
Performance measure
pred=rbf.predict(X_test)
print("Accuracy = ", accuracy_score(y_test,pred))
Accuracy = 0.9436325678496869
sig=SVC(kernel='sigmoid')
sig.fit(X_train,y_train)
SVC(kernel='sigmoid')
Performance measure
pred=sig.predict(X_test)
print("Accuracy = ", accuracy_score(y_test,pred))
Accuracy = 0.6534446764091858